Load R libraries
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.4, built: 2015-12-05)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(mice)
## Loading required package: lattice
library(miceadds)
## * miceadds 2.9-15 (2017-12-18 11:50:04)
library(ggplot2)
library(lattice)
Load vehicle training dataset without missing target.
setwd("/Users/davidleonardi/Projects/KE5107_BayesianNetworks/")
training_data <- read.csv("./Data/vehicle_safety_training_data.csv")
training_data$GV_LANES <- as.factor(training_data$GV_LANES)
training_data$GV_MODELYR <- as.factor(training_data$GV_MODELYR)
training_data$GV_WGTCDTR <- as.factor(training_data$GV_WGTCDTR)
training_data$OA_BAGDEPLY <- as.factor(training_data$OA_BAGDEPLY)
training_data$OA_MAIS <- as.factor(training_data$OA_MAIS)
training_data$OA_MANUSE <- as.factor(training_data$OA_MANUSE)
training_data$OA_SEX <- as.factor(training_data$OA_SEX)
training_data$VE_GAD1 <- as.factor(training_data$VE_GAD1)
summary(training_data)
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY
## Min. : 670 Min. :-114.000 Min. :-145.00 Min. : 4.0
## 1st Qu.:1360 1st Qu.: -6.000 1st Qu.: -23.00 1st Qu.: 153.5
## Median :1530 Median : 0.000 Median : -15.00 Median : 303.0
## Mean :1618 Mean : 0.019 Mean : -14.78 Mean : 498.3
## 3rd Qu.:1830 3rd Qu.: 6.000 3rd Qu.: -8.00 3rd Qu.: 598.5
## Max. :4310 Max. : 118.000 Max. : 84.00 Max. :9852.0
## NA's :35 NA's :4998 NA's :4998 NA's :4998
## GV_LANES GV_MODELYR GV_OTVEHWGT GV_SPLIMIT
## 2 :6174 2000 :2274 Min. : 640 Min. : 0.00
## 4 :3373 2002 :2270 1st Qu.:1340 1st Qu.:35.00
## 3 :2996 2001 :2176 Median :1550 Median :40.00
## 5 :2578 2003 :2015 Mean :1630 Mean :40.71
## 6 : 559 2004 :1732 3rd Qu.:1840 3rd Qu.:45.00
## (Other): 662 2005 :1643 Max. :4540 Max. :75.00
## NA's : 3 (Other):4235 NA's :1702 NA's :195
## GV_WGTCDTR OA_AGE OA_BAGDEPLY
## Passenger Car :10040 Min. : 7.00 Deployed :7652
## Truck (<=10000 lbs.): 2031 1st Qu.:25.00 Not Deployed:8693
## Truck (<=6000 lbs.) : 4274 Median :37.00
## Mean :40.24
## 3rd Qu.:52.00
## Max. :97.00
## NA's :13
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT
## Min. : 59.0 0:6223 0 : 1854 : 181 Min. : 31.0
## 1st Qu.:163.0 1:7559 1 :14162 Female:8068 1st Qu.: 64.0
## Median :170.0 2:1272 NA's: 329 Male :8096 Median : 77.0
## Mean :170.8 3: 796 Mean : 78.7
## 3rd Qu.:178.0 4: 246 3rd Qu.: 91.0
## Max. :216.0 5: 177 Max. :150.0
## NA's :1798 6: 72 NA's :1715
## VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## : 656 Min. :105.0 Min. :141.0 Min. : 5.0
## Front:9549 1st Qu.:149.0 1st Qu.:262.0 1st Qu.:115.0
## Left :2508 Median :154.0 Median :272.0 Median :135.0
## Rear :1383 Mean :154.7 Mean :281.1 Mean :152.4
## Right:2249 3rd Qu.:158.0 3rd Qu.:288.0 3rd Qu.:175.0
## Max. :185.0 Max. :481.0 Max. :355.0
## NA's :180 NA's :8 NA's :1598
## GV_FOOTPRINT
## Min. :2.468
## 1st Qu.:3.925
## Median :4.200
## Mean :4.363
## 3rd Qu.:4.552
## Max. :7.795
## NA's :184
Show missing data in diagram.
missmap(training_data[-1], col=c('grey', 'steelblue'), y.cex=0.5, x.cex=0.8)
Get hard numbers of missing data.
# Let's also get some hard numbers
sort(sapply(training_data, function(x) { sum(is.na(x)) }), decreasing=TRUE)
## GV_DVLAT GV_DVLONG GV_ENERGY OA_HEIGHT OA_WEIGHT
## 4998 4998 4998 1798 1715
## GV_OTVEHWGT VE_PDOF_TR OA_MANUSE GV_SPLIMIT GV_FOOTPRINT
## 1702 1598 329 195 184
## VE_ORIGAVTW GV_CURBWGT OA_AGE VE_WHEELBAS GV_LANES
## 180 35 13 8 3
## GV_MODELYR GV_WGTCDTR OA_BAGDEPLY OA_MAIS OA_SEX
## 0 0 0 0 0
## VE_GAD1
## 0
Show missing data pattern.
md.pattern(training_data)
## GV_MODELYR GV_WGTCDTR OA_BAGDEPLY OA_MAIS OA_SEX VE_GAD1 GV_LANES
## 9309 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 0
## 211 1 1 1 1 1 1 1
## 82 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 113 1 1 1 1 1 1 1
## 46 1 1 1 1 1 1 1
## 59 1 1 1 1 1 1 1
## 332 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 947 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1
## 64 1 1 1 1 1 1 1
## 2057 1 1 1 1 1 1 1
## 21 1 1 1 1 1 1 1
## 20 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 42 1 1 1 1 1 1 1
## 19 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 0
## 1041 1 1 1 1 1 1 1
## 30 1 1 1 1 1 1 1
## 26 1 1 1 1 1 1 1
## 56 1 1 1 1 1 1 1
## 10 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 831 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1
## 19 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1
## 212 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 182 1 1 1 1 1 1 1
## 24 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 19 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1
## 33 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 131 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 59 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 65 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 10 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 16 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 11 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 15 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 22 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 3
## VE_WHEELBAS OA_AGE GV_CURBWGT VE_ORIGAVTW GV_FOOTPRINT GV_SPLIMIT
## 9309 1 1 1 1 1 1
## 5 1 1 0 1 1 1
## 2 1 1 1 1 1 1
## 211 1 1 1 1 1 1
## 82 1 1 1 1 1 0
## 1 1 0 1 1 1 1
## 113 1 1 1 1 1 1
## 46 1 1 1 1 1 1
## 59 1 1 1 1 1 1
## 332 1 1 1 1 1 1
## 1 1 0 1 1 1 1
## 4 1 1 1 1 1 1
## 1 1 1 1 1 1 0
## 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1
## 2 1 1 1 1 1 0
## 947 1 1 1 1 1 1
## 6 1 1 1 1 1 0
## 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1
## 7 1 1 1 1 1 1
## 64 1 1 1 0 0 1
## 2057 1 1 1 1 1 1
## 21 1 1 1 1 1 1
## 20 1 1 1 1 1 0
## 4 1 0 1 1 1 1
## 42 1 1 1 1 1 1
## 19 1 1 1 1 1 1
## 4 1 1 1 0 0 1
## 1 1 1 1 0 0 0
## 3 1 1 1 0 0 1
## 1 1 1 1 0 0 1
## 1 0 1 0 1 0 1
## 6 1 1 1 0 0 1
## 13 1 1 0 1 1 1
## 1 1 1 1 1 1 1
## 1041 1 1 1 1 1 1
## 30 1 1 1 1 1 0
## 26 1 1 1 1 1 1
## 56 1 1 1 1 1 1
## 10 1 1 1 1 1 1
## 2 1 1 1 1 1 0
## 1 1 0 1 1 1 1
## 831 1 1 1 1 1 1
## 4 1 1 1 1 1 1
## 12 1 1 1 0 0 1
## 3 1 1 0 1 1 1
## 2 1 1 0 1 1 0
## 8 1 1 1 1 1 0
## 13 1 1 1 1 1 1
## 19 1 1 1 1 1 1
## 1 1 1 1 1 1 0
## 6 1 1 1 1 1 1
## 1 1 1 0 1 1 1
## 7 1 1 1 1 1 1
## 212 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 3 1 1 0 1 1 1
## 182 1 1 1 1 1 1
## 24 1 1 1 1 1 0
## 3 1 1 1 1 1 1
## 19 1 1 1 1 1 1
## 6 1 1 1 1 1 1
## 33 1 1 1 0 0 1
## 1 1 0 1 0 0 1
## 2 1 1 1 0 0 1
## 2 0 1 1 1 0 1
## 2 1 1 0 1 1 1
## 131 1 1 1 1 1 1
## 2 1 1 1 1 1 0
## 3 1 0 1 1 1 1
## 59 1 1 1 1 1 1
## 2 1 1 1 1 1 0
## 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1
## 1 1 1 1 1 1 0
## 2 1 1 1 1 1 1
## 65 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 10 1 1 1 0 0 1
## 1 1 1 1 0 0 0
## 2 1 1 1 0 0 1
## 1 0 1 0 1 0 1
## 1 0 1 1 0 0 1
## 16 1 1 1 0 0 1
## 1 1 1 1 0 0 1
## 1 1 1 0 1 1 1
## 11 1 1 1 1 1 1
## 4 1 1 1 1 1 0
## 1 1 0 1 1 1 1
## 1 1 1 1 1 1 0
## 1 1 1 0 1 1 1
## 15 1 1 1 1 1 1
## 2 1 1 1 1 1 0
## 1 1 0 1 1 1 1
## 22 1 1 1 1 1 1
## 1 1 1 1 0 0 0
## 3 1 1 1 0 0 1
## 1 0 1 0 0 0 1
## 1 0 1 1 0 0 1
## 6 1 1 1 0 0 1
## 3 1 1 1 0 0 1
## 2 1 1 1 1 1 1
## 1 1 1 1 1 1 0
## 2 1 1 1 0 0 1
## 1 1 1 0 0 0 1
## 1 1 1 1 0 0 1
## 1 1 1 1 1 1 0
## 1 1 1 1 0 0 1
## 1 0 1 1 0 0 1
## 1 1 1 1 0 0 1
## 8 13 35 180 184 195
## OA_MANUSE VE_PDOF_TR GV_OTVEHWGT OA_WEIGHT OA_HEIGHT GV_DVLAT
## 9309 1 1 1 1 1 1
## 5 1 1 1 1 1 1
## 2 1 1 1 1 1 1
## 211 1 1 0 1 1 1
## 82 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 113 1 1 1 1 0 1
## 46 0 1 1 1 1 1
## 59 1 1 1 0 1 1
## 332 1 0 1 1 1 1
## 1 1 1 0 1 1 1
## 4 1 1 0 1 0 1
## 1 1 1 1 1 0 1
## 1 0 1 0 1 1 1
## 3 0 1 1 1 0 1
## 2 1 1 1 0 1 1
## 947 1 1 1 0 0 1
## 6 1 0 1 1 1 1
## 1 1 0 1 1 0 1
## 5 0 0 1 1 1 1
## 7 1 0 1 0 1 1
## 64 1 1 1 1 1 1
## 2057 1 1 1 1 1 0
## 21 1 1 0 0 0 1
## 20 1 1 1 0 0 1
## 4 1 1 1 0 0 1
## 42 0 1 1 0 0 1
## 19 1 0 1 0 0 1
## 4 1 1 0 1 1 1
## 1 1 1 1 1 1 1
## 3 1 1 1 1 0 1
## 1 0 1 1 1 1 1
## 1 1 1 1 1 1 1
## 6 1 0 1 1 1 1
## 13 1 1 1 1 1 0
## 1 1 1 1 1 1 0
## 1041 1 1 0 1 1 0
## 30 1 1 1 1 1 0
## 26 1 1 1 1 0 0
## 56 0 1 1 1 1 0
## 10 1 1 1 0 1 0
## 2 0 1 1 0 0 1
## 1 0 1 1 0 0 1
## 831 1 0 1 1 1 0
## 4 0 0 1 0 0 1
## 12 1 1 1 0 0 1
## 3 1 1 0 1 1 0
## 2 1 1 1 1 1 0
## 8 1 1 0 1 1 0
## 13 1 1 0 1 0 0
## 19 0 1 0 1 1 0
## 1 0 1 1 1 1 0
## 6 0 1 1 1 0 0
## 1 1 1 1 0 1 0
## 7 1 1 0 0 1 0
## 212 1 1 1 0 0 0
## 1 0 1 1 0 1 0
## 3 1 0 1 1 1 0
## 182 1 0 0 1 1 0
## 24 1 0 1 1 1 0
## 3 1 0 1 1 0 0
## 19 0 0 1 1 1 0
## 6 1 0 1 0 1 0
## 33 1 1 1 1 1 0
## 1 1 1 1 0 0 1
## 2 0 1 1 0 0 1
## 2 1 1 1 1 1 0
## 2 1 1 1 0 0 0
## 131 1 1 0 0 0 0
## 2 1 1 1 0 0 0
## 3 1 1 1 0 0 0
## 59 0 1 1 0 0 0
## 2 1 0 0 1 1 0
## 1 1 0 0 1 0 0
## 3 0 0 0 1 1 0
## 1 0 0 1 1 1 0
## 2 0 0 1 1 0 0
## 65 1 0 1 0 0 0
## 1 0 0 1 0 1 0
## 10 1 1 0 1 1 0
## 1 1 1 1 1 1 0
## 2 0 1 1 1 1 0
## 1 1 1 1 1 1 0
## 1 1 1 1 1 1 0
## 16 1 0 1 1 1 0
## 1 0 0 1 0 0 1
## 1 0 1 1 0 0 0
## 11 0 1 0 0 0 0
## 4 0 1 1 0 0 0
## 1 0 1 1 0 0 0
## 1 0 0 1 1 0 0
## 1 1 0 1 0 0 0
## 15 1 0 0 0 0 0
## 2 1 0 1 0 0 0
## 1 1 0 1 0 0 0
## 22 0 0 1 0 0 0
## 1 1 1 0 1 1 0
## 3 1 1 1 0 0 0
## 1 1 1 1 1 1 0
## 1 0 1 1 1 1 0
## 6 1 0 0 1 1 0
## 3 0 0 1 1 1 0
## 2 0 0 0 0 0 0
## 1 0 0 1 0 0 0
## 2 1 1 0 0 0 0
## 1 1 0 0 1 1 0
## 1 1 0 1 0 0 0
## 1 0 0 0 0 0 0
## 1 0 1 0 0 0 0
## 1 0 1 1 0 0 0
## 1 0 0 1 0 0 0
## 329 1598 1702 1715 1798 4998
## GV_DVLONG GV_ENERGY
## 9309 1 1 0
## 5 1 1 1
## 2 1 1 1
## 211 1 1 1
## 82 1 1 1
## 1 1 1 1
## 113 1 1 1
## 46 1 1 1
## 59 1 1 1
## 332 1 1 1
## 1 1 1 2
## 4 1 1 2
## 1 1 1 2
## 1 1 1 2
## 3 1 1 2
## 2 1 1 2
## 947 1 1 2
## 6 1 1 2
## 1 1 1 2
## 5 1 1 2
## 7 1 1 2
## 64 1 1 2
## 2057 0 0 3
## 21 1 1 3
## 20 1 1 3
## 4 1 1 3
## 42 1 1 3
## 19 1 1 3
## 4 1 1 3
## 1 1 1 3
## 3 1 1 3
## 1 1 1 3
## 1 1 1 3
## 6 1 1 3
## 13 0 0 4
## 1 0 0 4
## 1041 0 0 4
## 30 0 0 4
## 26 0 0 4
## 56 0 0 4
## 10 0 0 4
## 2 1 1 4
## 1 1 1 4
## 831 0 0 4
## 4 1 1 4
## 12 1 1 4
## 3 0 0 5
## 2 0 0 5
## 8 0 0 5
## 13 0 0 5
## 19 0 0 5
## 1 0 0 5
## 6 0 0 5
## 1 0 0 5
## 7 0 0 5
## 212 0 0 5
## 1 0 0 5
## 3 0 0 5
## 182 0 0 5
## 24 0 0 5
## 3 0 0 5
## 19 0 0 5
## 6 0 0 5
## 33 0 0 5
## 1 1 1 5
## 2 1 1 5
## 2 0 0 5
## 2 0 0 6
## 131 0 0 6
## 2 0 0 6
## 3 0 0 6
## 59 0 0 6
## 2 0 0 6
## 1 0 0 6
## 3 0 0 6
## 1 0 0 6
## 2 0 0 6
## 65 0 0 6
## 1 0 0 6
## 10 0 0 6
## 1 0 0 6
## 2 0 0 6
## 1 0 0 6
## 1 0 0 6
## 16 0 0 6
## 1 1 1 6
## 1 0 0 7
## 11 0 0 7
## 4 0 0 7
## 1 0 0 7
## 1 0 0 7
## 1 0 0 7
## 15 0 0 7
## 2 0 0 7
## 1 0 0 7
## 22 0 0 7
## 1 0 0 7
## 3 0 0 7
## 1 0 0 7
## 1 0 0 7
## 6 0 0 7
## 3 0 0 7
## 2 0 0 8
## 1 0 0 8
## 2 0 0 8
## 1 0 0 8
## 1 0 0 8
## 1 0 0 9
## 1 0 0 9
## 1 0 0 9
## 1 0 0 9
## 4998 4998 22754
Use mice with Decision Tree to impute missing data.
imp.data <- mice(training_data, m=1, method='cart', printFlag=FALSE)
summary(imp.data)
## Multiply imputed data set
## Call:
## mice(data = training_data, m = 1, method = "cart", printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 35 4998 4998 4998 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1702 195 0 13
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 1798 0 329 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 1715 0 180 8 1598
## GV_FOOTPRINT
## 184
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "cart" "cart" "cart" "cart" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "cart" "cart" "cart" "cart" "cart"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "cart" "cart" "cart" "cart" "cart"
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "cart" "cart" "cart" "cart" "cart"
## GV_FOOTPRINT
## "cart"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
Write imputted values from decision tree model.
# write results
write.mice.imputation(mi.res=imp.data, name="mice_imp_cart")
## Warning in dir.create(pf.subf): '/Users/davidleonardi/Projects/
## KE5107_BayesianNetworks/mice_imp_cart' already exists
## 2018-03-19 13:15:20
##
## /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_cart
##
## Multiply imputed data set
## Call:
## mice(data = training_data, m = 1, method = "cart", printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 35 4998 4998 4998 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1702 195 0 13
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 1798 0 329 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 1715 0 180 8 1598
## GV_FOOTPRINT
## 184
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "cart" "cart" "cart" "cart" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "cart" "cart" "cart" "cart" "cart"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "cart" "cart" "cart" "cart" "cart"
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "cart" "cart" "cart" "cart" "cart"
## GV_FOOTPRINT
## "cart"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
## NULL
##
##
##
## To cite R in publications use:
##
## R Core Team (2017). R: A language and environment for
## statistical computing. R Foundation for Statistical Computing,
## Vienna, Austria. URL https://www.R-project.org/.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2017},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please
## cite it when using it for data analysis. See also
## 'citation("pkgname")' for citing R packages.
##
##
## To cite mice in publications use:
##
## Stef van Buuren, Karin Groothuis-Oudshoorn (2011). mice:
## Multivariate Imputation by Chained Equations in R. Journal of
## Statistical Software, 45(3), 1-67. URL
## http://www.jstatsoft.org/v45/i03/.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {{mice}: Multivariate Imputation by Chained Equations in R},
## author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn},
## journal = {Journal of Statistical Software},
## year = {2011},
## volume = {45},
## number = {3},
## pages = {1--67},
## url = {http://www.jstatsoft.org/v45/i03/},
## }
##
## sysname
## "Darwin"
## release
## "17.4.0"
## version
## "Darwin Kernel Version 17.4.0: Sun Dec 17 09:19:54 PST 2017; root:xnu-4570.41.2~1/RELEASE_X86_64"
## nodename
## "Davids-MacBook-Pro-2"
## machine
## "x86_64"
## login
## "davidleonardi"
## user
## "davidleonardi"
## effective_user
## "davidleonardi"
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_2.2.1 miceadds_2.9-15 mice_2.46.0 lattice_0.20-35
## [5] Amelia_1.7.4 Rcpp_0.12.15
##
## loaded via a namespace (and not attached):
## [1] lavaan_0.5-23.1097 mitools_2.3 splines_3.4.3
## [4] colorspace_1.3-2 htmltools_0.3.6 stats4_3.4.3
## [7] yaml_2.1.16 mgcv_1.8-22 rlang_0.1.6
## [10] survival_2.41-3 pillar_1.1.0 nloptr_1.0.4
## [13] foreign_0.8-69 lavaan.survey_1.1.3.1 plyr_1.8.4
## [16] mirt_1.26.3 GPArotation_2014.11-1 stringr_1.2.0
## [19] munsell_0.4.3 CDM_6.1-10 gtable_0.2.0
## [22] mvtnorm_1.0-6 coda_0.19-1 evaluate_0.10.1
## [25] knitr_1.18 permute_0.9-4 sirt_2.5-45
## [28] parallel_3.4.3 backports_1.1.2 scales_0.5.0
## [31] vegan_2.4-6 lme4_1.1-15 polycor_0.7-9
## [34] mnormt_1.5-5 digest_0.6.14 stringi_1.1.6
## [37] survey_3.33 grid_3.4.3 rprojroot_1.3-2
## [40] quadprog_1.5-5 tools_3.4.3 magrittr_1.5
## [43] lazyeval_0.2.1 tibble_1.4.1 cluster_2.0.6
## [46] pbivnorm_0.6.0 TAM_2.9-35 MASS_7.3-47
## [49] Matrix_1.2-12 minqa_1.2.4 rmarkdown_1.8
## [52] rpart_4.1-12 sfsmisc_1.1-2 nnet_7.3-12
## [55] nlme_3.1-131 compiler_3.4.3
##
## 1
## Data values written to /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_cart/mice_imp_cart__SPSS.txt
## Syntax file written to /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_cart/mice_imp_cart__SPSS.sps
Compare it with Mean value.
imp.data_raw_mean <- mice(training_data, m=1, defaultMethod=c('mean', 'cart', 'cart', 'cart'), printFlag=FALSE)
summary(imp.data_raw_mean)
## Multiply imputed data set
## Call:
## mice(data = training_data, m = 1, defaultMethod = c("mean", "cart",
## "cart", "cart"), printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 35 4998 4998 4998 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1702 195 0 13
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 1798 0 329 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 1715 0 180 8 1598
## GV_FOOTPRINT
## 184
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "mean" "mean" "mean" "mean" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "" "mean" "mean" "" "mean"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "" "mean" "" "cart" ""
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "mean" "" "mean" "mean" "mean"
## GV_FOOTPRINT
## "mean"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
Write imputted mean values from mean model.
# write results
write.mice.imputation(mi.res=imp.data_raw_mean, name="mice_imp_mean" )
## Warning in dir.create(pf.subf): '/Users/davidleonardi/Projects/
## KE5107_BayesianNetworks/mice_imp_mean' already exists
## 2018-03-19 13:16:05
##
## /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_mean
##
## Multiply imputed data set
## Call:
## mice(data = training_data, m = 1, defaultMethod = c("mean", "cart",
## "cart", "cart"), printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 35 4998 4998 4998 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1702 195 0 13
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 1798 0 329 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 1715 0 180 8 1598
## GV_FOOTPRINT
## 184
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "mean" "mean" "mean" "mean" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "" "mean" "mean" "" "mean"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "" "mean" "" "cart" ""
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "mean" "" "mean" "mean" "mean"
## GV_FOOTPRINT
## "mean"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
## NULL
##
##
##
## To cite R in publications use:
##
## R Core Team (2017). R: A language and environment for
## statistical computing. R Foundation for Statistical Computing,
## Vienna, Austria. URL https://www.R-project.org/.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2017},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please
## cite it when using it for data analysis. See also
## 'citation("pkgname")' for citing R packages.
##
##
## To cite mice in publications use:
##
## Stef van Buuren, Karin Groothuis-Oudshoorn (2011). mice:
## Multivariate Imputation by Chained Equations in R. Journal of
## Statistical Software, 45(3), 1-67. URL
## http://www.jstatsoft.org/v45/i03/.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {{mice}: Multivariate Imputation by Chained Equations in R},
## author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn},
## journal = {Journal of Statistical Software},
## year = {2011},
## volume = {45},
## number = {3},
## pages = {1--67},
## url = {http://www.jstatsoft.org/v45/i03/},
## }
##
## sysname
## "Darwin"
## release
## "17.4.0"
## version
## "Darwin Kernel Version 17.4.0: Sun Dec 17 09:19:54 PST 2017; root:xnu-4570.41.2~1/RELEASE_X86_64"
## nodename
## "Davids-MacBook-Pro-2"
## machine
## "x86_64"
## login
## "davidleonardi"
## user
## "davidleonardi"
## effective_user
## "davidleonardi"
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_2.2.1 miceadds_2.9-15 mice_2.46.0 lattice_0.20-35
## [5] Amelia_1.7.4 Rcpp_0.12.15
##
## loaded via a namespace (and not attached):
## [1] lavaan_0.5-23.1097 mitools_2.3 splines_3.4.3
## [4] colorspace_1.3-2 htmltools_0.3.6 stats4_3.4.3
## [7] yaml_2.1.16 mgcv_1.8-22 rlang_0.1.6
## [10] survival_2.41-3 pillar_1.1.0 nloptr_1.0.4
## [13] foreign_0.8-69 lavaan.survey_1.1.3.1 plyr_1.8.4
## [16] mirt_1.26.3 GPArotation_2014.11-1 stringr_1.2.0
## [19] munsell_0.4.3 CDM_6.1-10 gtable_0.2.0
## [22] mvtnorm_1.0-6 coda_0.19-1 evaluate_0.10.1
## [25] knitr_1.18 permute_0.9-4 sirt_2.5-45
## [28] parallel_3.4.3 backports_1.1.2 scales_0.5.0
## [31] vegan_2.4-6 lme4_1.1-15 polycor_0.7-9
## [34] mnormt_1.5-5 digest_0.6.14 stringi_1.1.6
## [37] survey_3.33 grid_3.4.3 rprojroot_1.3-2
## [40] quadprog_1.5-5 tools_3.4.3 magrittr_1.5
## [43] lazyeval_0.2.1 tibble_1.4.1 cluster_2.0.6
## [46] pbivnorm_0.6.0 TAM_2.9-35 MASS_7.3-47
## [49] Matrix_1.2-12 minqa_1.2.4 rmarkdown_1.8
## [52] rpart_4.1-12 sfsmisc_1.1-2 nnet_7.3-12
## [55] nlme_3.1-131 compiler_3.4.3
##
## 1
## Data values written to /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_mean/mice_imp_mean__SPSS.txt
## Syntax file written to /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_mean/mice_imp_mean__SPSS.sps
Plot GV_DVLAT and VE_PDOF_TR for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLAT ~ VE_PDOF_TR)
Plot GV_DVLONG and VE_PDOF_TR for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLONG ~ VE_PDOF_TR)
Plot GV_DVLAT and GV_FOOTPRINT for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLAT ~ GV_FOOTPRINT)
Plot GV_DVLAT and GV_FOOTPRINT for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLONG ~ GV_FOOTPRINT)
Plot GV_DVLAT and VE_PDOF_TR for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLAT ~ VE_PDOF_TR)
Plot GV_DVLONG and VE_PDOF_TR for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLONG ~ VE_PDOF_TR)
Plot GV_DVLAT and GV_FOOTPRINT for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLAT ~ GV_FOOTPRINT)
Plot GV_DVLONG and GV_FOOTPRINT for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLONG ~ GV_FOOTPRINT)
Save the imputed data.
data_complete <- complete(imp.data)
write.csv(data_complete, "./Data/vehicle_safety_training_imputed_data.csv")
Load vehicle testing dataset without missing target.
testing_data <- read.csv("./Data/vehicle_safety_testing_data.csv")
testing_data$GV_LANES <- as.factor(testing_data$GV_LANES)
testing_data$GV_MODELYR <- as.factor(testing_data$GV_MODELYR)
testing_data$GV_WGTCDTR <- as.factor(testing_data$GV_WGTCDTR)
testing_data$OA_BAGDEPLY <- as.factor(testing_data$OA_BAGDEPLY)
testing_data$OA_MAIS <- as.factor(testing_data$OA_MAIS)
testing_data$OA_MANUSE <- as.factor(testing_data$OA_MANUSE)
testing_data$OA_SEX <- as.factor(testing_data$OA_SEX)
testing_data$VE_GAD1 <- as.factor(testing_data$VE_GAD1)
summary(testing_data)
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY
## Min. : 690 Min. :-87.0000 Min. :-121.00 Min. : 13
## 1st Qu.:1350 1st Qu.: -6.0000 1st Qu.: -24.00 1st Qu.: 163
## Median :1520 Median : 0.0000 Median : -15.00 Median : 313
## Mean :1618 Mean : 0.6554 Mean : -14.58 Mean : 520
## 3rd Qu.:1840 3rd Qu.: 7.0000 3rd Qu.: -8.00 3rd Qu.: 596
## Max. :3380 Max. : 53.0000 Max. : 78.00 Max. :7411
## NA's :4 NA's :879 NA's :879 NA's :879
## GV_LANES GV_MODELYR GV_OTVEHWGT GV_SPLIMIT
## 1: 55 2001 :401 Min. : 680 Min. : 0.00
## 2:1173 2000 :396 1st Qu.:1350 1st Qu.:35.00
## 3: 524 2002 :389 Median :1550 Median :40.00
## 4: 549 2003 :333 Mean :1632 Mean :40.75
## 5: 403 2004 :316 3rd Qu.:1850 3rd Qu.:45.00
## 6: 92 2005 :253 Max. :3990 Max. :75.00
## 7: 62 (Other):770 NA's :290 NA's :26
## GV_WGTCDTR OA_AGE OA_BAGDEPLY
## Passenger Car :1760 Min. : 0.00 Deployed :1365
## Truck (<=10000 lbs.): 383 1st Qu.:26.00 Not Deployed:1493
## Truck (<=6000 lbs.) : 715 Median :37.00
## Mean :40.14
## 3rd Qu.:52.00
## Max. :93.00
## NA's :1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT
## Min. : 72.0 0:1096 0 : 321 : 20 Min. : 36.00
## 1st Qu.:163.0 1:1255 1 :2481 Female:1385 1st Qu.: 64.00
## Median :170.0 2: 245 NA's: 56 Male :1453 Median : 77.00
## Mean :171.1 3: 154 Mean : 79.43
## 3rd Qu.:178.0 4: 55 3rd Qu.: 91.00
## Max. :206.0 5: 40 Max. :150.00
## NA's :318 6: 13 NA's :293
## VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## : 115 Min. :133.0 Min. :220.0 Min. : 5.0
## Front:1627 1st Qu.:149.0 1st Qu.:262.0 1st Qu.:115.0
## Left : 489 Median :154.0 Median :272.0 Median :135.0
## Rear : 246 Mean :154.9 Mean :281.2 Mean :151.1
## Right: 381 3rd Qu.:158.0 3rd Qu.:288.0 3rd Qu.:165.0
## Max. :185.0 Max. :438.0 Max. :355.0
## NA's :39 NA's :266
## GV_FOOTPRINT
## Min. :3.190
## 1st Qu.:3.925
## Median :4.192
## Mean :4.371
## 3rd Qu.:4.558
## Max. :7.621
## NA's :39
Use mice with Decision Tree to impute missing data.
imp.data <- mice(testing_data, m=1, method='cart', printFlag=FALSE)
summary(imp.data)
## Multiply imputed data set
## Call:
## mice(data = testing_data, m = 1, method = "cart", printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 4 879 879 879 0
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 290 26 0 1
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 318 0 56 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 293 0 39 0 266
## GV_FOOTPRINT
## 39
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "cart" "cart" "cart" "cart" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "cart" "cart" "cart" "cart" "cart"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "cart" "cart" "cart" "cart" "cart"
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "cart" "cart" "cart" "cart" "cart"
## GV_FOOTPRINT
## "cart"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_OTVEHWGT
## 1 2 3 4 7
## GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE OA_WEIGHT
## 8 10 12 14 16
## VE_ORIGAVTW VE_PDOF_TR GV_FOOTPRINT
## 18 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 0 0 0 0 0 0
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 0 0 0 0 0 0
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 0 0 0 0 0
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 0 0 0 0 0
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 0 0 0 0 0 0
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 0 0 0 0 0 0
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 0 0 0 0
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 0 0 0 0
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
Save the imputed data.
testing_data_complete <- complete(imp.data)
write.csv(testing_data_complete, "./Data/vehicle_safety_testing_imputed_data.csv")